/*
 * Deal with the FMS
 */
#include <unistd.h>
#include <netinet/in.h>

#include "libfma.h"
#include "lf_fms_comm.h"
#include "lf_fma_comm.h"
#include "lf_channel.h"
#include "lf_scheduler.h"
#include "lf_fabric.h"
#include "lf_topo_map.h"
#include "lf_fms.h"
#include "lf_myri_packet.h"
#include "lf_fma_flags.h"
#include "libmyri.h"

#include "fma.h"
#include "fma_fms.h"
#include "fma_myri.h"
#include "fma_fabric.h"
#include "fma_map.h"
#include "fma_resolve.h"
#include "fma_verify.h"
#include "fma_standalone.h"
#include "fma_settings.h"
#include "fma_proxy.h"
#include "fma_tunnel.h"

/*
 * local functions
 */
static void try_fms_connection(void *);
static void lost_fms_conn(struct lf_channel *);
static void start_fms_message(struct lf_channel *);
static void finish_fms_message(struct lf_channel *);
static void fma_fms_send_complete(void *);
static void fma_fms_send_error(void *);
static void fma_proxy_tunnel_error(void *vmsg);
static void fma_proxy_tunnel_complete(void *vmsg);
static void fma_restore_or_free_fms_buf(void *buf, int length);
static void fma_forward_msg_from_fms(struct lf_proxy_fms_to_fma *msg,
  int length);
static void fma_maybe_redistribute_map(void);

/*
 * Initialize FMS-related variables
 */
int
fma_init_fms_vars()
{
  LF_CALLOC(A.fms, struct fma_fms, 1);
  A.fms->socket = -1;
  return 0;

 except:
  return -1;
}

/*
 * Take a comma seperated list of hostnames
 * and build FMS array
 */
int
fma_set_fms_list(
  char *list)
{
  char *w[LF_STRING_LEN];
  int wc;

  line2words(list, w, ",", 0, &wc);
  if (wc < 1) {
    LF_ERROR(("Parsing fms host list"));
  }

  /* free old hostname list */
  if (A.fms->num_hosts > 0) {
    lf_free_string_array(A.fms->host_list, A.fms->num_hosts);
  }

  /* create new hostname list */
  if (wc > 0) {
    A.fms->host_list = lf_dup_string_array(w, wc);
    if (A.fms->host_list == NULL) LF_ERROR(("Allocating FMS hostname list"));
  }

  A.fms->num_hosts = wc;

  return 0;

 except:
  return -1;
}

int
fma_init_fms()
{
  struct fma_fms *fms;
  struct lf_channel *chp;

  fms = A.fms;

  /* initiate a channel for this connection */
  LF_CALLOC(chp, struct lf_channel, 1);
  fms->chp = chp;
  chp->hangup_rtn = lost_fms_conn;
  chp->context = fms;

  return 0;

 except:
  return -1;
}

/*
 * This is called once the Myrinet is ready, and initiates FMS
 * connection attempts.
 */
int
fma_fms_myrinet_ready()
{
  struct fma_fms *fms;

  fms = A.fms;

  /* initiate FMS connection attemps if server(s) specified */
  if (fms->num_hosts > 0) {
    fms->connect_tries = 0;
    try_fms_connection(NULL);
    return 0;

  /* No hosts specified, pass back failure to caller */
  } else {
    return -1;
  }
}

/*
 * Try to connect to an FMS.
 * As long as we are disconnected, we will retry connecting to an FMS
 * via a socket.  It is possible, after we have established communication
 * to other FMAs, that we may be able to speak to an FMS through our
 * Myrinet connection, but keep trying for the socket connection anyhow, it
 * is preferable.
 *
 * The retry on the socket communication is accomplished by scheduling an event
 * in the future every time we fail to connect or lose connection.
 */
static void
try_fms_connection(
  void *x)
{
  struct lf_channel *chp;
  struct fma_fms *fms;
  uint8_t conn_type;
  int rc;

  fms = A.fms;

  if (A.debug) {
    fma_log("trying connection to %s", fms->host_list[fms->host_index]);
  }

  fms->socket = lf_connect_to_host(fms->host_list[fms->host_index],
				   FMS_SERVICE_PORT);

  /* if no connection, try again later */
  if (fms->socket == -1) goto retry;

  /* fill in the channel */
  chp = fms->chp;
  chp->fd = fms->socket;

  if (lf_add_channel(chp) != 0) LF_ERROR(("Adding channel"));

  /* set up to receive messages from FMS */
  lf_channel_receive(chp, &fms->header, sizeof(fms->header),
                     start_fms_message);

  /* let the FMS know this is an FMA connecting */
  conn_type = FMS_CONN_FMA;
  rc = lf_write(fms->socket, &conn_type, sizeof(conn_type));
  if (rc != sizeof(conn_type)) goto retry;

  /* got connected, retries is 0 for next time */
  fms->connect_tries = 0;

  /* all set to go! */
  fma_enter_fms_mode();

  /* send startup messages */
  if (fma_send_fma_version() == -1) goto retry;
  if (fma_send_myri_info() == -1) goto retry;

  fma_log("connection established"); /* XXX */
  return;

  /*
   * could not connect to FMS - reset error codes, advance to next FMS in the
   * list, and schedule a retry
   */
 retry:
  /* bump retry count.  If too high, enter standalone mode */
  if (A.run_state != FMA_RUN_STANDALONE) {
    ++fms->connect_tries;
    if (!A.fms_required &&
	fms->connect_tries > FMA_FMS_CONNECT_TRY_BEFORE_STANDALONE) {
      fma_enter_standalone_mode();
    }
  }

  /* next FMS in list */
  ++fms->host_index;
  if (fms->host_index >= fms->num_hosts) {
    fms->host_index = 0;
  }

  fma_reset_errors();

  /* make sure the socket is closed */
  if (fms->socket != -1) {
    close(fms->socket);
    fms->socket = -1;
  }

  /* schedule a retry */
  if (lf_schedule_event(try_fms_connection, NULL, FMA_FMS_RETRY_TIME) == NULL) {
    LF_ERROR(("Scheduling fms connect event"));
  }
  return;

 except:
  fma_perror();
  fma_exit(1);
}

/*
 * Called when we lose connection to the FMS server
 */
static void
lost_fms_conn(
  struct lf_channel *chp)
{
  fma_log("Lost FMS connection, will try to reconnect");

  /* Don't free things more than once if FMS is gone */
  if (A.fms->socket == -1) {
    return;
  }

  /* remove the channel from service and retry the connection */
  lf_remove_channel(chp);

  close(A.fms->socket);
  A.fms->socket = -1;

  /* We no longer have FMS contact */
  fma_reset_flag_bits(FMA_FLAG_HAS_FMS);

  /* next FMS in list */
  ++A.fms->host_index;
  if (A.fms->host_index >= A.fms->num_hosts) {
    A.fms->host_index = 0;
  }

  /* schedule a retry */
  if (lf_schedule_event(try_fms_connection, NULL, FMA_FMS_RETRY_TIME) == NULL) {
    LF_ERROR(("Scheduling fms connect event"));
  }
  return;

 except:
  fma_perror();
  fma_exit(1);
}

/*
 * Tell the FMS what version of software we are running
 */
int
fma_send_fma_version()
{
  struct fma_fms_ident_msg msg;
  int rc;

  /* fill in FMA version number */
  msg.fma_version_32 = htonl(LF_VERSION);

  /* fill in host info */
  strcpy(msg.hostname, A.hostname);

  /* send message to FMS */
  rc = fma_fms_send(FMA_FMS_IDENT_MSG, &msg, sizeof(msg), FALSE, NULL, NULL);
  if (rc != 0) return -1;

  return 0;
}

/*
 * Tell the FMS that the map i have received is invalid
 */
void
fma_fms_map_is_invalid(
  int map_version,
  int use_me,
  lf_string_t why)
{
  struct fma_fms_map_invalid_msg msg;

  /* copy in the reason */
  msg.minv_map_version_32 = htonl(map_version);
  msg.minv_use_me_8 = use_me;
  strcpy(msg.why, why);

  /* send this message to FMS */
  fma_fms_send(FMA_FMS_MAP_IS_INVALID, &msg, sizeof(msg), FALSE, NULL, NULL);
}

/*
 * Called when we get a message header
 */
static void
start_fms_message(
  struct lf_channel *chp)
{
  struct fma_fms *fms;
  int len;
  void *p;

  fms = chp->context;

  /* make sure message buffer is big enough to hold incoming message */
  len = ntohl(fms->header.length_32);

  /* if message length is 0, just handle it */
  if (len == 0) {
    
    finish_fms_message(chp);

  /* non-zero-length message, get the rest */
  } else {

    if (len > fms->fms_msgbuf_size) {
      p = realloc(fms->fms_msgbuf, len);
      if (p == NULL) LF_ERROR(("Error allocating space for incoming message"));

      fms->fms_msgbuf = (union lf_fma_message *)p;
      fms->fms_msgbuf_size = len;
    }

    /* get the rest of the message */
    lf_channel_receive(chp, fms->fms_msgbuf, len, finish_fms_message);
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * Called when a complete message has arrived
 */
static void
finish_fms_message(
  struct lf_channel *chp)
{
  struct fma_fms *fms;
  int length;
  int type;

  fms = chp->context;
  type = ntohl(fms->header.msg_type_32);
  length = ntohl(fms->header.length_32);

  /* process the message */
  fma_handle_message(type, length, fms->fms_msgbuf);

  /* set up to receive next message from FMS */
  lf_channel_receive(chp, &fms->header, sizeof(fms->header),
                     start_fms_message);
  return;
}

/*
 * Handle an incoming message
 */
void
fma_handle_message(
  int type,
  int length,
  union lf_fma_message *msg)
{
  int rc;

  /* switch on message type */
  switch (type) {
  case LF_FMA_TERMINATE:
    {
      char buf[2*LF_STRING_LEN];
      sprintf(buf, "Exiting at request of FMS, reason:\n\t%s",
	      msg->terminate.reason);
      LF_ERROR((buf));
    }
    break;

  case LF_FMA_TOPO_MAP:
    if (length > 0) {

      /* first, clear all old probes */
      fma_clear_verify_probes();

      /* Load and route the map */
      rc = fma_copy_and_load_map((struct lf_topo_map *)msg, length);

      /* Think about redistributing if map seems good */
      if (rc == 0) {
	fma_maybe_redistribute_map();
      }

      /* make verify assignments in 30 seconds */
      (void) lf_schedule_event((void ((*)(void *)))fma_assign_verifies,
			       NULL, 30*1000);

    } else {
      fma_log("Ignoring zero-length map");
    }
    break;

  case LF_FMA_SEND_INVALID_ROUTE:
    if (length > 0) {

      rc = fma_send_inv_rts(&msg->inv_rt);

      if (rc != 0) {
        fma_log("invalid route send failed for xbar %d",
            ntohl(msg->inv_rt.ir_xbar_index_32));
      }
    } else {
      fma_perror();
    }
    break;

  case LF_FMA_TOPO_LINK_STATE:
    fma_load_link_state((lf_topo_link_state_t *)msg);

    /* calculate the routes */
    rc = fma_route_topo_map();
    if (rc != 0) {
      fma_perror();
    } else {
      fma_log("routes re-set");
    }

    /* possibly redistribute map */
    fma_maybe_redistribute_map();

    break;

  case LF_FMA_SETTINGS:
    fma_fms_set_settings(&msg->settings);
    break;

  case LF_FMA_MAP_FABRIC:
    if (A.debug) fma_log("got FMS request to map fabric");

    fma_start_mapping_fabric(fma_mf_send_map_to_fms);
    break;

  /* forward a message from FMS to another FMS */
  case LF_FMA_PROXY_FMS_TO_FMA:
    fma_forward_msg_from_fms((struct lf_proxy_fms_to_fma *)msg, length);
    break;

  default:
    LF_ERROR(("Unknown message type from FMS"));
    break;
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * Report a NIC error
 */
void
fma_fms_report_nic_error(
  int nic_id,
  enum myri_error_type error)
{
  struct fma_fms_nic_error_msg msg;
  int sock;
  int rc;

  sock = A.fms->socket;

  /* Make sure there is someone to talk to */
  if (sock == -1) {
    LF_ERROR(("No FMS to which to report NIC error"));
  }

  /* record the NIC ID */
  msg.nic_id_32 = htonl(nic_id);
  msg.error_32 = htonl(error);

  /* send this message to FMS */
  rc = fma_fms_send(FMA_FMS_NIC_ERROR, &msg, sizeof(msg), FALSE, NULL, NULL);
  if (rc != 0) LF_ERROR(("Error reporting NIC error to FMS"));

  return;

 except:
  fma_perror();
}

/*
 * badcrc threshold exceeded
 */
void
fma_fms_nic_badcrc(
  int nic_id,
  int port,
  int badcrcs)
{
  struct fma_fms_nic_badcrc_msg msg;
  int sock;
  int rc;

  sock = A.fms->socket;

  /* Make sure there is someone to talk to */
  if (sock == -1) {
    LF_ERROR(("No FMS to which to report badcrc alert"));
  }

  /* record the NIC ID */
  msg.nic_id_32 = htonl(nic_id);
  msg.port_32 = htonl(port);
  msg.badcrcs_32 = htonl(badcrcs);

  /* send this message header to FMS */
  rc = fma_fms_send(FMA_FMS_NIC_BADCRC, &msg, sizeof(msg),
      FALSE, NULL, NULL);
  if (rc != 0) LF_ERROR(("Error reporting NIC badcrcs to FMS"));

  return;

 except:
  fma_perror();
}

/*
 * Write a block of data to the FMS.
 * Use a socket if we have one, else use our proxy if we don't.  
 */
int
fma_fms_send(
  int type,
  void *addr,
  int length,
  int is_static,
  void (*callback)(void *),
  void *context)
{
  struct fma_proxy_msg *pmp;
  struct fma_myri_packet *hdr;
  int hdrlen;
  int sock;
  void *rv;
  int rc;

  pmp = NULL;
  hdr = NULL;

  /* If we have a direct socket connection, use it and schedule the callback
   * immediately.
   */
  sock = A.fms->socket;
  if (sock != -1) {

    /* send the message header */
    rc = lf_send_fms_header(sock, type, length);
    if (rc != 0) LF_ERROR(("Error sending FMS header"));

    /* send the message itself, if there is one */
    if (addr != NULL) {
      rc = lf_write(sock, addr, length);
      if (rc != length) LF_ERROR(("Error sending FMS message"));
    }

    /* schedule callback immediately if specified */
    if (callback != NULL) {
      rv = lf_schedule_event(callback, context, 0);
      if (rv == NULL) LF_ERROR(("Error scheduling write callback"));
    }

  /* No socket, try using proxy tunnel */
  } else if (A.proxy != NULL) {
    struct fma_proxy *pp;
    void *msg;

    pp = A.proxy;

    /* allocate a descriptor for this message */
    LF_CALLOC(pmp, struct fma_proxy_msg, 1);
    
    /* fill in message buffer.  if memory needs to be allocated, the address
     * is saved for freeing once the tunnel send is complete
     */
    if (is_static) {
      msg = addr;
    } else {
      LF_CALLOC(msg, char, length);
      pmp->msg = msg;
      memcpy(msg, addr, length);
    }
    pmp->callback = callback;
    pmp->context = context;

    /* allocate a header */
    LF_CALLOC(hdr, struct fma_myri_packet, 1);
    hdrlen = sizeof(struct lf_myri_packet_hdr)
	  + sizeof(struct fma_proxy_fma_to_fms_hdr);
    pmp->hdr = hdr;

    hdr->h.type_16 = htons(FMA_PACKET_TYPE);
    hdr->h.subtype_16 = htons(FMA_SUBTYPE_PROXY_FMA_TO_FMS);

    hdr->u.proxy_fma_to_fms.h.proxy_client_id_32 = htonl(pp->client_id);
    strcpy(hdr->u.proxy_fma_to_fms.h.hostname, A.hostname);
    hdr->u.proxy_fma_to_fms.h.msg_type_32 = htonl(type);
    hdr->u.proxy_fma_to_fms.h.length_32 = htonl(length);

    lf_reverse_route(hdr->u.proxy_fma_to_fms.h.return_route,
	pp->route, pp->route_len);
    hdr->u.proxy_fma_to_fms.h.return_route_len_8 = pp->route_len;

fma_log("sending FMS via proxy, type=%d, len=%d", type, length);

    (void) fma_tunnel_send(hdr, hdrlen, msg, length,
	pp->route, pp->route_len, pp->nip, pp->port,
	fma_fms_send_complete, fma_fms_send_error, pmp);
    
  } else {
    LF_ERROR(("No way to talk to FMS!"));
  }

  return 0;

 except:
  fma_perror();
  if (sock != -1) {
    lost_fms_conn(A.fms->chp);
  }
  LF_FREE(pmp);
  LF_FREE(hdr);
  return -1;
}


static void
fma_fms_send_complete(
  void *vpmp)
{
  struct fma_proxy_msg *pmp;

  pmp = vpmp;

  if (pmp->callback != NULL) {
    pmp->callback(pmp->context);	/* perform callback */
  }

  /* clean up */
  LF_FREE(pmp->hdr);
  LF_FREE(pmp->msg);
  LF_FREE(pmp);
}

/*
 * Our tunnel send timed out - this means we need to find a new proxy!
 * Re-enter standalone mode, which re-maps, and that will find a new proxy
 * if one is out there.
 */
static void
fma_fms_send_error(
  void *vpmp)
{
  struct fma_proxy_msg *pmp;

  pmp = vpmp;

  /* clean up */
  LF_FREE(pmp->hdr);
  LF_FREE(pmp->msg);
  LF_FREE(pmp);

  /* If not already reverted to standalone mode, do it now */
  if (A.run_state != FMA_RUN_STANDALONE) {
    fma_log("Lost contact with proxy server, entering standalone mode");
    fma_enter_standalone_mode();
  }
  return;
}

/*
 * Leave standalone mode - we are now an agent of the FMS
 */
void
fma_enter_fms_mode()
{
  if (A.debug) fma_log("enter FMS mode");

  /* clean up from any other modes */
  switch (A.run_state) {
  case FMA_RUN_STANDALONE:
    fma_cancel_standalone_mode();
    break;
  case FMA_RUN_PROXY_PENDING:
  case FMA_RUN_FMS_VIA_PROXY:
    fma_cancel_proxy_mode();
    break;
  default:
    break;
  }

  /* set new run state and mapper level */
  A.run_state = FMA_RUN_FMS_DIRECT;
  fma_set_mapping_level(FMA_FMS_MAPPER_LEVEL);
  fma_set_flag_bits(FMA_FLAG_HAS_FMS);
}

/*
 * Forward a message from out on the Myrinet to the FMS
 */
void
fma_proxy_msg_to_fms(
  struct fma_nic_info *nip,
  int port,
  struct fma_proxy_fma_to_fms *pkt)
{
  struct lf_proxy_fma_to_fms_hdr hdr;
  int length;
  int rc;
  int fatal;
  int sock;

  fatal=FALSE;

  /* Make sure we know how to forward this */
  if (A.run_state != FMA_RUN_FMS_DIRECT) {
    LF_ERROR(("FMS forwarding requested, not directly connected"));
  }

  /* double-check that our socket is engaged! */
  sock = A.fms->socket;
  if (sock == -1) {
    LF_ERROR(("FMS forwarding requested, socket not open"));
  }

  /* Fill in the header for FMS */
  hdr.proxy_client_id_32 = pkt->h.proxy_client_id_32;
  strcpy(hdr.hostname, pkt->h.hostname);
  memcpy(hdr.return_route, pkt->h.return_route, pkt->h.return_route_len_8);
  hdr.return_route_len_8 = pkt->h.return_route_len_8;
  hdr.nic_index_8 = nip->nic_index;
  hdr.port_8 = port;
  hdr.msg_type_32 = pkt->h.msg_type_32;
  hdr.length_32 = pkt->h.length_32;
fma_log("forwarding msg to fms - type=%d, len=%d",
    ntohl(pkt->h.msg_type_32), ntohl(pkt->h.length_32));

  length = ntohl(pkt->h.length_32);

  /*
   * Send the message - 3 parts.
   */
  fatal = TRUE;
  rc = lf_send_fms_header(sock, FMA_FMS_PROXY_FMA_TO_FMS, sizeof(hdr)+length);
  if (rc != 0) LF_ERROR(("Error sending FMS header"));
  rc = lf_write(sock, &hdr, sizeof(hdr));
  if (rc != sizeof(hdr)) {
    LF_ERROR(("Error sending proxied message header to FMS"));
  }
  rc = lf_write(sock, pkt->data, length);
  if (rc != length) LF_ERROR(("Error sending proxied message to FMS"));
  return;

 except:
  if (fatal) {
    lost_fms_conn(A.fms->chp);
  }
  fma_perror();
}

/*
 * Forward a message from the FMS to an FMA through the Myrinet.  We
 * just take the body and tunnel it to the target FMA.  The buffer needs to
 * be kept around for tunnel re-sends, but we'll not copy it by just stealing
 * the incoming FMS buffer.  When the send is done, if no new fms message has
 * arrived, we can replace it, else we need to free it when the tunnel send
 * completes.
 */
static void
fma_forward_msg_from_fms(
  struct lf_proxy_fms_to_fma *msg,
  int length)
{
  struct fma_nic_info *nip;
  int port;
  struct lf_myri_packet_hdr hdr;
  int nic_index;
  struct fma_fms *fms;

  /* Use index to get NIC pointer */
  nic_index = msg->h.nic_index_8;
  if (nic_index < 0 || nic_index >= A.myri->num_nics) {
    LF_ERROR(("Bad NIC index in lf_proxy_fms_to_fma"));
  }
  nip = A.myri->nic_info[msg->h.nic_index_8];
  port = msg->h.port_8;

  /* Steal the FMS message buffer.  With luck, we'll put it back, else we'll
   * just end up freeing it.
   * slightly nasty trick - since we need to remember the length of this
   * allocated buffer, we're going to save it in it's own "length" field.
   * without even byte reversing it... yuk
   */
  fms = A.fms;
  if ((void *)msg != (void *)fms->fms_msgbuf) {
    LF_ERROR(("Bad assumption!"));
  }
  fms->fms_msgbuf_size = 0;
  fms->fms_msgbuf = NULL;

  /* build packet header */
  hdr.type_16 = ntohs(FMA_PACKET_TYPE);
  hdr.subtype_16 = ntohs(FMA_SUBTYPE_PROXY_FMS_TO_FMA);

  /* send the message */
  (void) fma_tunnel_send(&hdr, sizeof(hdr),
      &msg->eh, length-sizeof(struct lf_proxy_fms_to_fma_hdr),
      msg->h.route, msg->h.route_len_8,
      nip, port,
      fma_proxy_tunnel_complete, fma_proxy_tunnel_error, msg);
  return;

 except:
  fma_perror_exit(1);
}

/*
 * Take the pointer and length and put them back as the FMS incoming buffer
 * if there currently is no incoming buffer.
 */
static void
fma_restore_or_free_fms_buf(
  void *buf,
  int length)
{
  struct fma_fms *fms;

  fms = A.fms;
  if (fms->fms_msgbuf == NULL) {
    fms->fms_msgbuf = (union lf_fma_message *)buf;
    fms->fms_msgbuf_size = length;
  } else {
    LF_FREE(buf);
  }
  fma_restore_or_free_fms_buf(buf, *(int *)buf);
}

/*
 * proxy tunnel send has completed
 */
static void
fma_proxy_tunnel_complete(
  void *vmsg)
{
  struct lf_proxy_fms_to_fma *msg;

  msg = vmsg;

  LF_FREE(msg);
}

/*
 * proxy tunnel send has failed
 */
static void
fma_proxy_tunnel_error(
  void *vmsg)
{
  struct lf_proxy_fms_to_fma *msg;

  msg = vmsg;

  fma_log("message forwarding to FMS tunnel failed");

  LF_FREE(msg);
}

static void
fma_map_redist_done()
{
  if (A.debug) {
    fma_log("FMS Map redistribution complete");
  }
}

/*
 * check to see if we need to redistribute map to anyone.
 */
static void
fma_maybe_redistribute_map()
{

  /* Redistribute map to any hosts that need it.
   * This will be hosts that do not have FMS contact
   */
  fma_distribute_topo_map(fma_map_redist_done);
}
